rm(list=ls())

setwd("C:/Users/nkostyuk3/Dropbox (GaTech)/Dissertation/Diffusion/Organizations/Publications/MoD/JPR_November2022/replication_files")
library(dplyr)
library(survival)
library(ggplot2)

#####################
#loading data:
load("data_predictions.RData")



# filter year to only 2019-2020:
data_pred2_2019_2020 = data_pred %>%
  filter(YEAR>=2019) 


# main model:
form0 <- paste0(
  'Surv(tstart,tstop, ADOPTION)~',
  'weights_DEFENSEmp_short+',
  'weights_DEFENSE_long+',
  'weights_adversary_short+',
  'weights_adversary_long+',
  'LOG_GDP_PERCAP_sc+', 
  'DEMOCRACY+NATO+prestige+',
  'cluster(ISO_ADOPTING)')
form0
fitted_mod <- coxph(as.formula(form0), data=data_pred, x=TRUE)
fitted_mod
summary(fitted_mod)


library(survival)
data_pred2_2019_2020$mod_pred = predict(fitted_mod, newdata=data_pred2_2019_2020, type='lp')

# 2019:
adopters_2019 = data_pred2_2019_2020%>%
  filter(YEAR==2019)%>%
  mutate(RANK=rank(mod_pred))%>% # -mod_pred - descending order
  filter(ADOPTION==1)%>%  #| ISO_ADOPTING == 'BWA' | ISO_ADOPTING == 'BHR')%>%
  mutate(LABEL = paste0(ISO_ADOPTING, '(', RANK, ')'))


# 2020:
adopters_2020 = data_pred2_2019_2020%>%
  filter(YEAR==2020)%>%
  mutate(RANK=rank(mod_pred))%>% # -mod_pred - descending order
  filter(ADOPTION==1)%>%
  mutate(LABEL = paste0(ISO_ADOPTING, '(', RANK, ')'))
adopters_2020


##############################
# overall model performance: 
##############################

# roc score:
#install.packages('pROC')
library(pROC)

auc(ADOPTION~mod_pred, data=data_pred2_2019_2020) # 0.6912
ci.auc(ADOPTION~mod_pred, data=data_pred2_2019_2020, method='b')
# 95% CI: 0.5265-0.8445 (2000 stratified bootstrap replicates)

# creating ggplot with both predictions:
adopters19 = adopters_2019 %>%
  dplyr::select(ISO_ADOPTING, YEAR, LABEL, RANK, DATE)%>%
  mutate(PERC = case_when(
    ISO_ADOPTING =='MRT' ~ ceiling(RANK/68*100), 
    ISO_ADOPTING =='ARM' ~ ceiling(RANK/68*100),
    ISO_ADOPTING =='LBN' ~ ceiling(RANK/66*100), 
    ISO_ADOPTING =='GHA' ~ ceiling(RANK/65*100), 
    ISO_ADOPTING =='URY' ~ ceiling(RANK/64*100)
    )
    ) %>%
  mutate(LABEL = paste0(ISO_ADOPTING, '(', PERC, '%)'))
adopters_2019 %>% 
  select(ISO_ADOPTING, DATE) %>% 
  arrange(DATE)
adopters20 = adopters_2020 %>%
  dplyr::select(ISO_ADOPTING, YEAR, LABEL, RANK, DATE)%>%
  mutate(PERC = case_when(
    ISO_ADOPTING =='BWA' ~ ceiling(RANK/63*100), 
    ISO_ADOPTING =='BHR' ~ ceiling(RANK/62*100) 
    )
    ) %>%
  mutate(LABEL = paste0(ISO_ADOPTING, '(', PERC, '%)'))
adopters_2020 %>% 
  select(ISO_ADOPTING, DATE) %>% 
  arrange(DATE)
adopters_all = adopters19 %>%
  full_join(adopters20) 
adopters_all

adopters_all %>%
  ggplot(aes(x=as.Date(DATE), y=PERC))+
  geom_point()+
  scale_x_date(date_labels = "%Y-%m",
               date_breaks = "3 months")+
  geom_text(aes(label=LABEL), 
            vjust = -0.5, size = 3.5, position = position_dodge(width = 6))+
  geom_hline(yintercept=50, linetype="dashed", 
             color = "gray", size=1)+
  theme_bw()+
  # theme(legend.background = element_rect(size=0.5, linetype="solid",
  #                                        colour ="black"))+
  theme(legend.position = "bottom",
        legend.box = "vertical",
        legend.title = element_blank())+
  labs(x = "Date",y='Percentile Rank of Relative Risk for Developing \n Military Cybercapacity (out of 100%)')+
  # complete blank backgroun
  theme(panel.background = element_blank(),
        axis.line = element_line(colour = "black"))
#ggsave('Latex/MoD/Figures/predictions_v2.pdf')
